import requests as rq
from bs4 import BeautifulSoup
alllater=[]
# re =rq.get('https://www.baidu.com',timeout=30)
# print(re)
# print(re.status_code)
# print(re.text)
# print(re.encoding)
# re.encoding='utf-8'
# print(re.text)
# print(re.content)
# re.raise_for_status()
def gethtml(url):
    try:
        re = rq.get(url,timeout=30)
        re.raise_for_status()
        re.encoding='utf-8'
        return re.text
    except:
        print('有异常')
        return ' '
def gethtmldata(soup):
    table=soup.find('table',attrs='rk-table')
    trs=table.find_all('tr')
    # print(trs)
    for tr in trs:
        tds=tr.find_all('td')
        singlelist=[]
        if len(tds)==0:
            continue
        for i in range(len(tds)):
            if i==0:
                singlelist.append(tds[i].string.strip())
            elif i==1:
                singlelist.append(tds[i].a.string)
            elif i==2:
                singlelist.append(tds[i].contents[0].strip())
            elif i==3:
                singlelist.append(tds[i].contents[0].strip())
            else:singlelist.append(tds[i].string.strip())
        alllater.append((singlelist))

def writedatatocsy():
    with open('data.csv','w') as f:
        for line in alllater:
            f.write(','.join(line)+'\n')
# def main():
url='https://www.shanghairanking.cn/rankings/bcur/2021'
# print(gethtml(url))
html=gethtml(url)
soup=BeautifulSoup(html,'html.parser')
gethtmldata(soup)
writedatatocsy()

# print(soup)
# print(soup.head)
# print(soup.title)
# print(soup.body)
# print(soup.p)
# for txt in soup.stripped_strings:
#     print(txt)
# print(soup.title.name)#获取标签名字
# print(soup.a.attrs)#获取a标签的属性,
# print(soup.a.contents)#获取a标签中所有的内容并以列表的形式存储
# print(soup.a.string)
#find(标签名，属性）从html文本中查找相应的标签，查找的标签为第一个标签
# print(soup.find('a'))
# #find_all(标签名，属性）从html文本中查找所有相应的标签，并以列表保存
# print(soup.find_all('a'))